home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
9-Digit Zip Code Directory
/
9-Digit Zip Code Directory (American Business Information) (ABIZIP-12).ISO
/
z4src.zip
/
DASORT.C
< prev
next >
Wrap
C/C++ Source or Header
|
1995-09-12
|
19KB
|
637 lines
//----------------------------------------------------------------------------
// MODULE DESCRIPTION
//
// Module: dasort.c
// Title: Data File Build Utility Library
// Notice: John M. Weeder
// Copyright (c) 1993. All rights reserved.
// This module contains proprietary information and should be
// treated as confidential.
//
//----------------------------------------------------------------------------
// MAINTENANCE HISTORY
//
// $Workfile$
// $Revision$
// $Author$
// $Date$
// $Log$
//
//----------------------------------------------------------------------------
// MODULE NARRATIVE
//
//
// This module contains sort the index file of the delimited data file.
//
// Sort methodology:
// Assume there are N records
// Assign a fixed length key of size K to each record
// Define a sort area size of S bytes.
// The sort area will hold s = S / K records.
// Define P = (N / s) + 1 as the number of passes to be made. This is the
// number of sort buffers needed to hold all the records.
// If P > s, then abort because the final merge will not work
// efficiently.
// Make P passes at the data file. Each time sort the next
// s records in the file (in place). This is done by reading s keys
// into the sort buffer and using QuickSort to sort them. They are then
// written back to disk.
// Use the sort buffer as a cache. Read the first record from each sort
// pass into the buffer (P records). Select lowest record and write it
// to a temporary file.
// Update the cache with the next record from the pass from which the
// previous record was selected. Keep doing this until all records are
// processed.
// Delete current index and rename temporary index to be the current index.
//
// The code in this module should be written entirely in C.
// Do not use any C++ constructs.
//
// This module is portable to:
// DOS 3.X+
// MS Windows 3.X+
// OS/2 2.X+
// OS/2 2.0 PM
// SCO UNIX.
//
// The following compilers are supported:
// MSC 6.0A
// MSC/C++ 7.0
// Borland C++ 3.1 for DOS
// Borland C++ 1.0 for OS/2 2.X
// SCO UNIX cc
//
//----------------------------------------------------------------------------
#include <da.h>
//----------------------------------------------------------------------------
// Global data
//----------------------------------------------------------------------------
typedef struct _G // Sort data
{
PDATACFG pcfg; // Configuration data
DLM dlm; // Delimited file data.
HF hfTemp; // Temporary file handle
CHAR szTemp[MAX_PATH]; // Temporary file name
SIZET cKeyLen; // Length of key
SIZET cKeySort; // Size of key to sort with
SIZET cRecsPerPart; // Max records per partition//CHGD TO LONG
SIZET cPasses; // Number of passes to make
LONG lRecs; // Records in file
LONG lStart; // Starting record in this partition
LONG lEnd; // Ending record in this partition
PLONG plRecord; // Pointer to cache record indicator
PBYTE pbPartition; // Sort partition buffer
BOOL fSorted; // File sorted indicator
PBYTE pbWrite; // Index key write buffer
PBYTE pbWriteNext; // Next space in buffer
SIZET cbWrite; // Number of records in buffer
PSIZET pcMerge; // Next space in buffer
} G;
BASETYPE(G);
static G g;
#define MAX_WRITE_BUF (1024)
#define FL_CF (FL_TRUNCATE|FL_CREATE|FL_READWRITE\
|FL_DENYREADWRITE|FL_BINARY)
//----------------------------------------------------------------------------
// Prototypes
//----------------------------------------------------------------------------
static int __keysort__(const void *, const void *);
static BOOL FN_L DataSortFile(void);
static BOOL FN_L DataSortInitialize(void);
static BOOL FN_L DataSortMerge(void);
static BOOL FN_L DataSortMergeProcess(void);
static BOOL FN_L DataSortMergeRead(SIZET, BOOL);
static BOOL FN_L DataSortMergeWrite(PDLMNDX);
static SHORT FN_L DataSortPartition(void);
static BOOL FN_L DataSortTerminate(void);
static BOOL FN_L DataSortWrite(void);
//----------------------------------------------------------------------------
// Description: Sort routine used by qsort to sort keys
// Parameters: see qsort()
// Returns: see qsort()
//----------------------------------------------------------------------------
static int __keysort__(const void *pv1, const void *pv2)
{
return memcmp(pv1, pv2, g.cKeySort);
}
//----------------------------------------------------------------------------
// Description: Sort a delimited data file.
// Parameters: pcfg Configuration file data.
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
BOOL FN_E DataSort(PDATACFG pcfg)
{
BOOL fResult;
TIMET timetElapsed = time(NULL);
Assert(pcfg);
memset(&g, 0, sizeof(g)); // Zero config data
g.hfTemp = -1; // Initialize variables
g.pcfg = pcfg;
if (!DataSortInitialize()) // Initialize
{
DataSortTerminate();
return FALSE;
}
#if OS_DOS ||OS_OS2
if (g.pcfg->fVerbose)
Output("Press <ESC> to abort processing...\n");
#endif
fResult = DataSortFile(); // Sort data
DataSortTerminate(); // Done
if (g.pcfg->fVerbose)
{
timetElapsed = time(NULL) - timetElapsed;
Output("Elapsed time %ld days, %ld hours, %ld minutes, %ld seconds.\n",
DAYS(timetElapsed), HOURS(timetElapsed),
MINUTES(timetElapsed), SECONDS(timetElapsed));
}
return fResult;
}
//----------------------------------------------------------------------------
// Description: Sort input file.
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortFile(void)
{
SHORT sResult;
SIZET cPass;
LONG lRecords;
g.fSorted = TRUE;
for (cPass = 0; cPass < g.cPasses; ++cPass)
{
while (KbdReady())
if (KbdChar() == '\x1B')
{
if (g.pcfg->fVerbose)
Output("\r%79s\rProcessing aborted!\n", "");
return FALSE;
}
if (g.pcfg->fVerbose)
#if OS_UNIX
Output("Processing partition %6u of %u\n", cPass+1, g.cPasses);
#else
Output("\rProcessing partition %6u of %u", cPass+1, g.cPasses);
#endif
g.lStart = (LONG)cPass * (LONG)g.cRecsPerPart;
lRecords = (LONG)g.cRecsPerPart;
g.lEnd = g.lStart + lRecords - 1;
if (g.lEnd >= g.lRecs)
g.lEnd = g.lRecs - 1;
g.plRecord[(SIZET)cPass] = g.lStart;
sResult = DataSortPartition();
if (sResult < 0)
{
return FALSE;
}
else if (sResult == 0)
{
g.fSorted = FALSE;
if (!DataSortWrite())
return FALSE;
}
}
if (g.pcfg->fVerbose)
Output("\rProcessing partition %6u of %u\n", cPass, g.cPasses);
if (g.cPasses > 1) // File was not sorted, so merge it.
{
if (!DataSortMerge())
return FALSE;
}
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Initialize for file sorting
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortInitialize(void)
{
SIZET cPartition;
LONG lPasses;
// Open delimited data files
if (!DataDelimitOpen(g.pcfg, &g.dlm)
|| g.dlm.lRecs == 0)
return FALSE;
if (g.dlm.lRecs <= 1)
return TRUE;
strcpy(g.szTemp, g.pcfg->szDelimitNdx);
FnameAppendExt(g.szTemp, "$$$", TRUE);
#if OS_UNIX
cPartition = 8192 _K;
#else
cPartition = (SIZET)63 _K;
#endif
g.cKeySort = DataKeyLen(g.pcfg);
g.cKeyLen = g.cKeySort + sizeof(DLMNDX);
g.lRecs = g.dlm.lRecs;
g.cRecsPerPart = (SIZET) (cPartition / g.cKeyLen);
if ((LONG)g.cRecsPerPart > g.lRecs)
g.cRecsPerPart = (SIZET)g.lRecs;
lPasses = g.lRecs / (LONG)g.cRecsPerPart;
if ((g.lRecs % (LONG)g.cRecsPerPart) != 0)
lPasses++;
if (lPasses > (LONG)g.cRecsPerPart)
{
Error("Partition size is too small.");
return FALSE;
}
g.cPasses = (SIZET)lPasses;
g.plRecord = MemAlloc((SIZET)(g.cPasses * sizeof(LONG)));
g.pbPartition = MemAlloc(g.cRecsPerPart * g.cKeyLen);
g.pbWrite = MemAlloc(sizeof(DLMNDX) * MAX_WRITE_BUF);
g.pcMerge = MemAlloc((SIZET)((g.cPasses + 1) * sizeof(SIZET)));
if (g.plRecord == NULL
|| g.pbPartition == NULL
|| g.pcMerge == NULL
|| g.pbWrite == NULL)
return ErrorNoMem();
g.pbWriteNext = g.pbWrite;
if (g.pcfg->fVerbose)
{
Output("Sorting %ld records\n", g.lRecs);
Output("Key length is %u (%u) bytes\n", g.cKeySort, g.cKeyLen);
Output("%u partitions of %u records\n", g.cPasses, g.cRecsPerPart);
}
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Merge sorted partitions into one index file.
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortMerge(void)
{
SIZET cPass;
BOOL fResult;
PBYTE pb1, pb2;
PDLMNDX pdlmndx1,pdlmndx2;
DLMREC dlmrec;
pb1 = g.pbPartition; // Check if file already sorted
pb2 = g.pbPartition + g.cKeyLen;
for (cPass = 1; cPass < g.cPasses; ++cPass)
{
LONG lRec1, lRec2;
lRec2 = g.plRecord[cPass];
pdlmndx2 = DataDelimitReadNdx(&g.dlm, lRec2);
if (pdlmndx2 == NULL
|| DataDelimitReadRecordNdx(&g.dlm, lRec2, pdlmndx2) == NULL)
return FALSE;
// Create key
DataDelimitConvert(&g.dlm, &dlmrec);
DataKey(&dlmrec, (PSZ)pb2);
lRec1 = lRec2 - 1;
pdlmndx1 = DataDelimitReadNdx(&g.dlm, lRec1);
if (pdlmndx1 == NULL
|| DataDelimitReadRecordNdx(&g.dlm, lRec1, pdlmndx1) == NULL) {
return FALSE;
}
DataDelimitConvert(&g.dlm, &dlmrec);
DataKey(&dlmrec, (PSZ)pb1);
if (memcmp(pb1, pb2, g.cKeySort) > 0)
{
g.fSorted = FALSE;
break;
}
}
if (g.fSorted) // If already sorted, early out
return TRUE;
if (!FileOpen(&g.hfTemp, g.szTemp, FL_CF, NULL))
return FALSE;
if (g.pcfg->fVerbose)
Output("\rOpened temporary index file '%s'\n", g.szTemp);
for (cPass = 0; cPass <= g.cPasses; ++cPass)
g.pcMerge[cPass] = MAX_SIZET;
// Preload cache
for (cPass = 0; cPass < g.cPasses; ++cPass)
if (!DataSortMergeRead(cPass, FALSE))
return FALSE;
fResult = DataSortMergeProcess(); // Do the actual merge
DataDelimitClose(&g.dlm); // Rename temporary file to index
FileClose(g.hfTemp); // file name
g.hfTemp = -1;
if (fResult)
{
FnameDelete(g.pcfg->szDelimitNdx);
if (!FnameRename(g.szTemp, g.pcfg->szDelimitNdx))
return FALSE;
}
else if (g.szTemp[0] && FnameIsFile(g.szTemp))
FnameDelete(g.szTemp);
return fResult;
}
//----------------------------------------------------------------------------
// Description: Merge sorted partitions into one index file.
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortMergeProcess(void)
{
LONG lRec;
for (lRec = 0; lRec < g.lRecs; ++lRec)
{
SIZET cWhich;
if (lRec == 0 || ((lRec+1) % 1000) == 0)
{
while (KbdReady())
if (KbdChar() == '\x1B')
{
if (g.pcfg->fVerbose)
Output("\r%79s\rProcessing aborted!\n", "");
return FALSE;
}
if (g.pcfg->fVerbose)
#if OS_UNIX
Output("Merging record %8ld of %ld\n", lRec+1, g.lRecs);
#else
Output("\rMerging record %8ld of %ld", lRec+1, g.lRecs);
#endif
}
cWhich = g.pcMerge[0]; // Get lowest available element
Assert(cWhich != MAX_SIZET);
if (!DataSortMergeWrite((PDLMNDX)(g.pbPartition + cWhich * g.cKeyLen + g.cKeySort)))
return FALSE;
// Read next record into cache
if (!DataSortMergeRead(cWhich, TRUE))
return FALSE;
}
if (!DataSortMergeWrite(NULL)) // Flush output
return FALSE;
if (g.pcfg->fVerbose)
Output("\rMerging record %8ld of %ld\n", lRec, g.lRecs);
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Read the next record for a partition.
// been set for each partition.
// Parameters: cPass Which partition
// fInc If true, read next record, else current
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortMergeRead(SIZET cPass, BOOL fInc)
{
PBYTE pb1, pb2;
PDLMNDX pdlmndx1, pdlmndx2;
LONG lNext;
DLMREC dlmrec;
BOOL fEmpty = FALSE; // Partition empty?
PSIZET pc1, pc2;
pc1 = pc2 = g.pcMerge;
if (fInc)
{
g.plRecord[cPass]++; // Move pointer to next record
// Check if partition is used up
lNext = (LONG)(cPass + 1) * (LONG)g.cRecsPerPart;
if (g.plRecord[cPass] >= lNext
|| g.plRecord[cPass] >= g.lRecs)
{ // No more records in this partition
g.plRecord[cPass] = -1;
fEmpty = TRUE;
}
}
if (!fEmpty)
{
pb1 = g.pbPartition + cPass * g.cKeyLen;
pdlmndx1 = (PDLMNDX)(pb1 + g.cKeySort);
pdlmndx2 = DataDelimitReadNdx(&g.dlm, g.plRecord[cPass]);
if (pdlmndx2 == NULL
|| DataDelimitReadRecordNdx(&g.dlm, g.plRecord[cPass], pdlmndx2) == NULL)
return FALSE;
DataDelimitConvert(&g.dlm, &dlmrec);
DataKey(&dlmrec, (PSZ)pb1);
*pdlmndx1 = *pdlmndx2; // Store index info
//
// Insert entry in table in sorted order!
//
for ( ; *pc2 != MAX_SIZET && *pc2 != cPass; ++pc2)
{
pb2 = g.pbPartition + pc2[0] * g.cKeyLen;
if (memcmp(pb1, pb2, g.cKeySort) <= 0)
{
SIZET cSave = *pc2; // Put in list in the place of current
*pc2++ = cPass; // element
for ( ; *pc2 != MAX_SIZET; ++pc2)
if (*pc2 == cPass)
{
*pc2 = cSave;
return TRUE;
}
else // Propogate change of the list
{ // This happens on an insert only
SIZET cTemp = *pc2;
*pc2 = cSave;
cSave = cTemp;
}
*pc2 = cSave; // Place displaced element at end of list
return TRUE;
}
}
if (*pc2 == MAX_SIZET) // Partition was not in list
{ // Add it to the end of the list
*pc2 = cPass;
return TRUE;
}
//
// We are at the previous sort location of this partition.
// Insert the partition in its new location.
//
pc1 = pc2;
++pc2;
for (; *pc2 != MAX_SIZET; ++pc2)
{
pb2 = g.pbPartition + pc2[0] * g.cKeyLen;
if (memcmp(pb1, pb2, g.cKeySort) <= 0)
break;
*pc1++ = *pc2; // Copy down to make room
}
*pc1 = cPass; // Place element in list
}
else
{
for ( ;*pc2 != MAX_SIZET; ++pc2) // Eliminate partition from merge
if (*pc2 != cPass) // list
*pc1++ = *pc2;
*pc1 = MAX_SIZET;
}
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Write a merge record. Output is buffered
// Parameters: pdlmndx Record to write.
// If null, remaining records are flushed
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortMergeWrite(PDLMNDX pdlmndx)
{
if (pdlmndx != NULL)
{
memcpy(g.pbWriteNext, (PBYTE)pdlmndx, sizeof(DLMNDX));
g.pbWriteNext += sizeof(DLMNDX);
g.cbWrite++;
}
if (g.cbWrite >= MAX_WRITE_BUF || (pdlmndx == NULL && g.cbWrite > 0))
{
if (!FileWrite(g.hfTemp, g.pbWrite, sizeof(DLMNDX) * g.cbWrite, -1L))
return FALSE;
g.pbWriteNext = g.pbWrite;
g.cbWrite = 0;
}
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Sort a partition
// Parameters:
// Returns: 1 = Already sorted
// 0 = Sorted
// -1 = Failure
//----------------------------------------------------------------------------
static SHORT FN_L DataSortPartition(void)
{
LONG lRec;
SIZET cRecords = (SIZET)(g.lEnd - g.lStart + 1);
BOOL fSorted = TRUE;
PBYTE pb = g.pbPartition;
PDLMNDX pdlmndx1, pdlmndx2;
DLMREC dlmrec;
PBYTE pbLast = NULL;
for (lRec = g.lStart; lRec <= g.lEnd; ++lRec)
{
pdlmndx1 = DataDelimitReadNdx(&g.dlm, lRec);
if (pdlmndx1 == NULL // Read record
|| DataDelimitReadRecordNdx(&g.dlm, lRec, pdlmndx1) == NULL)
return -1;
DataDelimitConvert(&g.dlm, &dlmrec);
DataKey(&dlmrec, (PSZ)pb);
// Store index info
pdlmndx2 = (PDLMNDX)(pb + g.cKeySort);
*pdlmndx2 = *pdlmndx1;
if (pbLast) // Check if out of order
if (memcmp(pbLast, pb, g.cKeySort) > 0)
fSorted = FALSE;
pbLast = pb; // Move to next
pb += g.cKeyLen;
}
if (fSorted) // Already sorted!
return 1;
// Sort the data
qsort(g.pbPartition, cRecords, g.cKeyLen, __keysort__);
return 0;
}
//----------------------------------------------------------------------------
// Description:
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortTerminate(void)
{
if (g.hfTemp >= 0) // Close temporary file
FileClose(g.hfTemp);
if (FnameIsFile(g.szTemp)) // Delete temporary file
FnameDelete(g.szTemp);
if (g.plRecord)
MemFree(g.plRecord);
if (g.pbPartition)
MemFree(g.pbPartition);
if (g.pbWrite)
MemFree(g.pbWrite);
if (g.pcMerge)
MemFree(g.pcMerge);
DataDelimitClose(&g.dlm); // Close delimited file
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Write sorted partition to disk
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static BOOL FN_L DataSortWrite(void)
{
HF hf;
LONG lRecord;
FPOS fpos;
SIZET cbWrite;
PBYTE pb = g.pbPartition;
PDLMNDX pdlmndx = (PDLMNDX)g.pbPartition;
DataDelimitFlush(&g.dlm); // Flush index cache
for (lRecord = g.lStart; lRecord <= g.lEnd; ++lRecord)
{ // Write new indexes!
*pdlmndx = *(PDLMNDX)(pb + g.cKeySort);
pb += g.cKeyLen;
pdlmndx++;
}
hf = g.dlm.hfDelimitNdx; // Write updated indexes!
fpos = g.lStart * sizeof(DLMNDX);
cbWrite = (SIZET)((g.lEnd - g.lStart + 1) * sizeof(DLMNDX));
return FileWrite(hf, g.pbPartition, cbWrite, fpos);
}
//----------------------------------------------------------------------------
//------------------------------- End of File --------------------------------
//----------------------------------------------------------------------------